Accurate Classification of Cardiomyopathy Diagnosis by Chromatin Accessibility

Author

Jialei Duan

Published

Thu Sep 22, 2022 23:59:37-05:00

Doi
Abstract
This study establishes proof-of-concept for a cardiomyopathy diagnostic algorithm using chromatin accessibility signatures at a sequencing depth achievable by benchtop instruments.


[1] "2022-09-22 23:58:51 CDT"
[1] "America/Chicago"

Preparation

PROJECT_DIR <- file.path(
    "/Users/jialei/Dropbox/Data/Projects/UTSW",
    "/Cardiomyopathy/atac-seq"
)

Functions

Load required packages.

library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.2.9000 ──
## ✔ ggplot2   3.3.6.9000        ✔ dplyr     1.0.99.9000  
## ✔ tibble    3.1.8.9001        ✔ stringr   1.4.1.9000   
## ✔ tidyr     1.2.1.9000        ✔ forcats   0.5.2.9000   
## ✔ readr     2.1.2.9000        ✔ lubridate 1.8.0.9000   
## ✔ purrr     0.9000.0.9000     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(patchwork)
library(extrafont)
## Registering fonts with R
source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)
`%+replace%` <- ggplot2::`%+replace%`

Python env

np <- reticulate::import("numpy", convert = TRUE)
cat("numpy version:", np$`__version__`, "\n")
numpy version: 1.22.4 
reticulate::py_config()
python:         /Users/jialei/.pyenv/shims/python
libpython:      /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/libpython3.9.dylib
pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10:/Users/jialei/.pyenv/versions/mambaforge-4.10.3-10
version:        3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:00:33)  [Clang 13.0.1 ]
numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
numpy_version:  1.22.4
numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy

NOTE: Python version was forced by RETICULATE_PYTHON

Differentially accessible regions

# define function
compute_diff_peaks <- function(matrix,
                               group_a,
                               group_b,
                               num_thread = 4) {
    cat(length(group_a), "\n")
    cat(length(group_b), "\n")

    cts <- matrix[, c(
        group_a,
        group_b
    )]
    cat(ncol(cts), "\n")

    col_data <- data.frame(
        sample = c(group_a, group_b),
        category = c(
            rep("group_a", length(group_a)),
            rep("group_b", length(group_b))
        )
    )

    BiocParallel::register(BiocParallel::MulticoreParam(num_thread))

    dds <- DESeq2::DESeqDataSetFromMatrix(
        countData = cts,
        colData = col_data,
        design = ~category
    )

    dds <- DESeq2::DESeq(dds)
    res <- DESeq2::results(dds)
    DESeq2::summary(res)

    return(res)
}
# sample metadata
samples_fresh_healthy <- c(
    "F1_1", "F1_2", "F2_1", "F2_2",
    "F5_1", "F5_2", "P3_1", "P3_2",
    "P5_1", "P5_2", "P6_1", "P6_2"
)

samples_fresh_icm <- c(
    "P104a_1", "P104a_2", "P117b_1", "P117b_2",
    "P123b_1", "P123b_2", "P131a_1", "P131a_2",
    "P92a_1", "P92a_2"
)

samples_fresh_nicm <- c(
    "P114b_1", "P114b_2", "P59a_1", "P59a_2",
    "P60a_1", "P60a_2", "P73a_1", "P73a_2",
    "P87a_1", "P87a_2"
)

samples_fresh_hcm <- c(
    "HOCM4_1", "HOCM4_2", "HOCM6_1", "HOCM6_2",
    "HOCM9_1", "HOCM9_2", "HOCM11_1", "HOCM11_2",
    "HOCM7_1", "HOCM7_2"
)
samples <- list(
    fresh_healthy = samples_fresh_healthy,
    fresh_icm = samples_fresh_icm,
    fresh_nicm = samples_fresh_nicm,
    fresh_hcm = samples_fresh_hcm
)

Pairwise comparison

samples_combn <- combinat::combn(x = names(samples), m = 2)

diff_peaks <- purrr::map(seq_len(ncol(samples_combn)), \(x) {
    sample_names_a <- samples[[samples_combn[, x][1]]]
    sample_names_b <- samples[[samples_combn[, x][2]]]

    cat(length(sample_names_a), "\n")
    cat(sample_names_a, "\n")
    cat(length(sample_names_b), "\n")
    cat(sample_names_b, "\n")

    compute_diff_peaks(
        matrix = matrix_readcount_use,
        group_a = sample_names_a,
        group_b = sample_names_b,
        num_thread = parallel::detectCores() - 1
    )
})
12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
12 
10 
22 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing

out of 206017 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 20685, 10%
LFC < 0 (down)     : 16920, 8.2%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
12 
10 
22 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing

out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 7721, 3.7%
LFC < 0 (down)     : 4264, 2.1%
outliers [1]       : 0, 0%
low counts [2]     : 31954, 16%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
12 
10 
22 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 4 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing

out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 3112, 1.5%
LFC < 0 (down)     : 3829, 1.9%
outliers [1]       : 0, 0%
low counts [2]     : 23966, 12%
(mean count < 4)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
10 
10 
20 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing

out of 206018 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 7031, 3.4%
LFC < 0 (down)     : 12331, 6%
outliers [1]       : 0, 0%
low counts [2]     : 59914, 29%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
10 
10 
20 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing

out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 9286, 4.5%
LFC < 0 (down)     : 15191, 7.4%
outliers [1]       : 0, 0%
low counts [2]     : 39943, 19%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
10 
10 
20 
Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing

out of 206018 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 3799, 1.8%
LFC < 0 (down)     : 7798, 3.8%
outliers [1]       : 0, 0%
low counts [2]     : 79885, 39%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
names(diff_peaks) <- purrr::map_chr(
    seq_len(ncol(samples_combn)), \(x) {
        paste(samples_combn[, x], collapse = "_vs_")
    }
)
FC_THRESHOLD <- 1
PADJ_THRESHOLD <- 0.01

features_selected <- purrr::map(diff_peaks, \(x) {
    x |>
        as.data.frame() |>
        dplyr::filter(
            padj < PADJ_THRESHOLD,
            abs(log2FoldChange) >= FC_THRESHOLD
        ) |>
        tibble::rownames_to_column(var = "feature") |>
        pull(feature)
}) |>
    unlist() |>
    unique()
Loading required package: DESeq2
Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics

Attaching package: 'BiocGenerics'
The following objects are masked from 'package:lubridate':

    intersect, setdiff, union
The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union
The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs
The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which.max, which.min

Attaching package: 'S4Vectors'
The following objects are masked from 'package:Matrix':

    expand, unname
The following objects are masked from 'package:lubridate':

    second, second<-
The following objects are masked from 'package:dplyr':

    first, rename
The following object is masked from 'package:tidyr':

    expand
The following objects are masked from 'package:base':

    expand.grid, I, unname
Loading required package: IRanges

Attaching package: 'IRanges'
The following object is masked from 'package:lubridate':

    %within%
The following objects are masked from 'package:dplyr':

    collapse, desc, slice
The following object is masked from 'package:purrr':

    reduce
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats

Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':

    count

Attaching package: 'MatrixGenerics'
The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
    rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
    rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
    rowWeightedSds, rowWeightedVars
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Attaching package: 'Biobase'
The following object is masked from 'package:MatrixGenerics':

    rowMedians
The following objects are masked from 'package:matrixStats':

    anyMissing, rowMedians
features_selected |> length()
[1] 5753
# sanity check
stopifnot(
    length(features_selected) == 5753
)

Healthy vs. disease

diff_peaks <- compute_diff_peaks(
    matrix = matrix_readcount_use,
    group_a = samples_fresh_healthy,
    group_b = c(
        samples_fresh_icm,
        samples_fresh_nicm,
        samples_fresh_hcm
    ),
    num_thread = parallel::detectCores() - 1
)
12 
30 
42 

out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 14884, 7.2%
LFC < 0 (down)     : 15309, 7.4%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
FC_THRESHOLD <- 1
PADJ_THRESHOLD <- 0.05
diff_peaks |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "feature") |>
    dplyr::mutate(
        da = dplyr::case_when(
            (
                (
                    abs(log2FoldChange) >= FC_THRESHOLD
                ) & (
                    padj <= PADJ_THRESHOLD
                )
            ) ~ 1,
            TRUE ~ 0
        ),
        da = as.factor(da)
    ) |>
    dplyr::arrange(da) |>
    dplyr::mutate(
        category = "Healthy vs Disease"
    ) |>
    {
        \(x)
        x |>
            ggplot2::ggplot(
                ggplot2::aes(
                    x = log2(baseMean),
                    y = log2FoldChange,
                    color = da
                )
            ) +
            ggrastr::rasterise(
                ggplot2::geom_point(
                    size = 0.4,
                    alpha = 1,
                    stroke = 0, shape = 16
                ),
                dpi = 900,
                dev = "ragg_png"
            ) +
            ggplot2::geom_density_2d(
                color = "steelblue", size = 0.2
            ) +
            ggplot2::scale_color_manual(
                values = c("grey65", "salmon"), guide = "none"
            ) +
            ggplot2::facet_wrap(
                ggplot2::vars(category),
                nrow = 1,
                strip.position = "top",
            ) +
            ggplot2::labs(
                x = expression(paste("Log"[2], " mean")),
                y = expression(paste("Log"[2], " (fold change)"))
            ) +
            ggplot2::geom_text(
                data = x |>
                    dplyr::count(da),
                ggplot2::aes(label = paste("Num:", n)),
                x = -5,
                y = 5,
                color = "salmon",
                fontface = "bold",
                size = 6 / ggplot2::.pt,
                hjust = 0,
                vjust = 1
            ) +
            ggplot2::theme_light() %+replace%
            ggplot2::theme(
                axis.title = ggplot2::element_text(
                    family = "Arial", size = 7
                ),
                axis.text = ggplot2::element_text(
                    family = "Arial", size = 7
                ),
                panel.background = ggplot2::element_blank(),
                strip.text = ggplot2::element_text(
                    family = "Arial",
                    size = 7,
                    margin = ggplot2::margin(
                        t = 4.4, r = 4.4, b = 4.4, l = 4.4, unit = "pt"
                    )
                )
            )
    }()

diff_peaks |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "feature") |>
    dplyr::mutate(
        da = dplyr::case_when(
            abs(log2FoldChange) >= FC_THRESHOLD & padj <= PADJ_THRESHOLD ~ 1,
            TRUE ~ 0
        ),
        da = as.factor(da)
    ) |>
    dplyr::arrange(da) |>
    dplyr::mutate(
        category = "Healthy vs Disease"
    ) |>
    head(n = 12)
           feature    baseMean log2FoldChange     lfcSE        stat     pvalue
1  1_181358_181567    6.285720    -0.25082657 0.2225674 -1.12696904 0.25975557
2  1_183716_183885    2.917794    -0.61104587 0.3185859 -1.91799391 0.05511178
3  1_183999_184321    5.587581    -0.40899805 0.2454201 -1.66652230 0.09560943
4  1_191239_191880   16.735046    -0.19449895 0.1594596 -1.21973776 0.22256430
5  1_267894_268128    6.533567     0.05967949 0.2321327  0.25709207 0.79710770
6  1_629834_630082  704.298690     0.03921131 0.1246965  0.31445402 0.75317623
7  1_633881_634177 1670.223559    -0.10947573 0.1252210 -0.87426032 0.38197650
8  1_778297_779354   78.566315     0.01854268 0.1195157  0.15514849 0.87670426
9  1_794936_795259    7.667010    -0.44843135 0.2054008 -2.18320186 0.02902095
10 1_816789_817532   15.907760    -0.26947734 0.1423673 -1.89283152 0.05838028
11 1_818925_819284    7.305783     0.20719291 0.2135904  0.97004803 0.33202255
12 1_819939_820383    8.225880     0.01173971 0.2131140  0.05508655 0.95606951
        padj da           category
1  0.5057420  0 Healthy vs Disease
2  0.2128599  0 Healthy vs Disease
3  0.2896372  0 Healthy vs Disease
4  0.4649598  0 Healthy vs Disease
5  0.9045374  0 Healthy vs Disease
6  0.8802030  0 Healthy vs Disease
7  0.6229343  0 Healthy vs Disease
8  0.9438932  0 Healthy vs Disease
9  0.1480275  0 Healthy vs Disease
10 0.2197918  0 Healthy vs Disease
11 0.5774971  0 Healthy vs Disease
12 0.9813831  0 Healthy vs Disease

Heatmap

RASTERISED <- FALSE
matrix_cpm_use <- t(
    t(matrix_readcount_use) / colSums(matrix_readcount_use)
) * 1e+06

matrix_heatmap <- matrix_cpm_use[features_selected, unlist(samples)]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]

matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))

heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
# hierarchical clustering
hclust_out_features <- hclust(
    dist(matrix_heatmap, method = "euclidean"),
    method = "complete"
)

NUM_CENTERS <- 4
hclust_out_features_df <- cutree(
    tree = hclust_out_features, k = NUM_CENTERS
) |>
    tibble::as_tibble(rownames = "feature") |>
    dplyr::mutate(order = hclust_out_features$order) |>
    dplyr::rename(hclust_group = value)

hclust_out_features_df <- purrr::map2(
    c(2, 1, 4, 3), seq_len(NUM_CENTERS), \(x, y) {
        hclust_out_features_df |>
            dplyr::filter(hclust_group == x) |>
            dplyr::arrange(order) |>
            dplyr::mutate(group = y)
    }
) |>
    dplyr::bind_rows()
# heatmap column annotation
ha_group <- colnames(matrix_heatmap) |>
    tibble::enframe(value = "sample") |>
    dplyr::left_join(
        purrr::map(names(samples), \(x) {
            data.frame(
                sample = samples[[x]],
                group = x
            )
        }) |>
            dplyr::bind_rows(),
        by = "sample"
    ) |>
    dplyr::pull(group) |>
    stringr::str_to_title()

ha_column <- ComplexHeatmap::HeatmapAnnotation(
    group = ComplexHeatmap::anno_simple(
        ha_group,
        col = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = names(samples) |>
                stringr::str_to_title()
        ),
        which = "column",
        pt_size = grid::unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = grid::unit(1.5, "mm")
    ),
    #
    show_annotation_name = FALSE,
    annotation_label = c(
        "Group"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)
# heatmap row annotation
ha_region <- hclust_out_features_df |>
    pull(group) |>
    {
        \(x) paste("Signature", x)
    }()

ha_left <- ComplexHeatmap::HeatmapAnnotation(
    region = ComplexHeatmap::anno_simple(
        ha_region,
        col = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = sort(unique(ha_region))
        ),
        which = "row",
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = grid::unit(1.5, "mm")
    ),
    which = "row",
    show_annotation_name = FALSE,
    annotation_label = c(
        "Region"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)
sample_ids <- tibble::tribble(
    ~sample_id, ~sample_id_publication,
    "F1_1", "C1_1",
    "F1_2", "C1_2",
    "F2_1", "C2_1",
    "F2_2", "C2_2",
    "F5_1", "C3_1",
    "F5_2", "C3_2",
    "P3_1", "C4_1",
    "P3_2", "C4_2",
    "P5_1", "C5_1",
    "P5_2", "C5_2",
    "P6_1", "C6_1",
    "P6_2", "C6_2",
    "P104a_1", "I1_1",
    "P104a_2", "I1_2",
    "P117b_1", "I2_1",
    "P117b_2", "I2_2",
    "P123b_1", "I3_1",
    "P123b_2", "I3_2",
    "P131a_1", "I4_1",
    "P131a_2", "I4_2",
    "P92a_1", "I5_1",
    "P92a_2", "I5_2",
    "P114b_1", "NI1_1",
    "P114b_2", "NI1_2",
    "P59a_1", "NI2_1",
    "P59a_2", "NI2_2",
    "P60a_1", "NI3_1",
    "P60a_2", "NI3_2",
    "P73a_1", "NI4_1",
    "P73a_2", "NI4_2",
    "P87a_1", "NI5_1",
    "P87a_2", "NI5_2",
    "P75a_1", "U1_1",
    "P75a_2", "U1_2",
    "P115b_1", "U2_1",
    "P115b_2", "U2_2",
    "P141a_1", "U3_1",
    "P141a_2", "U3_2",
    "HOCM4_1", "H1_1",
    "HOCM4_2", "H1_2",
    "HOCM6_1", "H2_1",
    "HOCM6_2", "H2_2",
    "HOCM9_1", "H3_1",
    "HOCM9_2", "H3_2",
    "HOCM11_1", "H4_1",
    "HOCM11_2", "H4_2",
    "HOCM7_1", "H5_1",
    "HOCM7_2", "H5_2",
    "MYEC4_2", "U6_2",
    "P108b_1", "U4_1",
    "P108b_2", "U4_2"
)

sample_ids <- setNames(
    object = sample_ids$sample_id_publication,
    nm = sample_ids$sample_id
)
# heatmap
ht <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap |> as.matrix(),
    rect_gp = grid::gpar(col = NA, lwd = 0),
    col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = 0,
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = FALSE,
    show_column_names = TRUE,
    column_labels = sample_ids[colnames(matrix_heatmap)],
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    top_annotation = ha_column,
    bottom_annotation = NULL,
    left_annotation = ha_left,
    #
    column_split = ha_group |>
        tibble::enframe() |>
        dplyr::mutate(
            value = stringr::str_remove(
                string = value, pattern = "Fresh_"
            ),
            value = case_when(
                value == "healthy" ~ "Healthy",
                TRUE ~ stringr::str_to_upper(value)
            )
        ) |>
        dplyr::pull(value) |>
        factor(
            levels = c("Healthy", "ICM", "NICM", "HCM")
        ),
    column_gap = grid::unit(0, "mm"),
    #
    show_heatmap_legend = TRUE,
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = grid::unit(12, "mm"),
        legend_width = grid::unit(4, "mm")
    ),
    #
    use_raster = RASTERISED
)
# legend
lgd_group <- ComplexHeatmap::Legend(
    title = "Group",
    labels = ha_group |> unique(),
    legend_gp = grid::gpar(
        fill = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = names(samples) |>
                stringr::str_remove(pattern = "fresh_") |>
                stringr::str_to_title()
        )
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)

pd <- ComplexHeatmap::packLegend(
    lgd_group,
    direction = "vertical"
)
# draw
ComplexHeatmap::draw(
    ht,
    heatmap_legend_list = list(pd)
)

GO enrichment

Code
enriched_go |>
    dplyr::mutate(
        category = factor(
            category,
            levels = c(
                "fresh_healthy",
                "fresh_icm",
                "fresh_nicm",
                "fresh_hcm"
            )
        ),
        rank = factor(rank, levels = 5:1)
    ) |>
    ggplot2::ggplot(
        ggplot2::aes(
            x = p_value_log,
            y = rank,
            fill = category,
        )
    ) +
    ggplot2::geom_bar(stat = "identity") +
    ggplot2::facet_wrap(
        ggplot2::vars(category),
        ncol = 1,
        strip.position = "left",
        scales = "free_x",
        labeller = ggplot2::labeller(
            category = c(
                "fresh_healthy" = "Healthy",
                "fresh_icm" = "ICM",
                "fresh_nicm" = "NICM",
                "fresh_hcm" = "HCM"
            )
        )
    ) +
    ggplot2::geom_text(
        ggplot2::aes(
            x = 0,
            label = term,
            group = NULL
        ),
        size = 6 / ggplot2::.pt,
        family = "Arial",
        color = "black",
        data = enriched_go |>
            dplyr::mutate(
                term = stringr::str_replace(
                    string = term,
                    pattern = "organization",
                    replacement = "org."
                ),
                category = factor(
                    category,
                    levels = c(
                        "fresh_healthy",
                        "fresh_icm",
                        "fresh_nicm",
                        "fresh_hcm"
                    )
                ),
                rank = factor(rank, levels = 5:1),
                term = paste(" ", term)
            ),
        hjust = 0
    ) +
    ggplot2::scale_x_continuous(
        name = expression(paste("-log"[10], " p")),
        labels = scales::math_format(10^.x)
    ) +
    ggplot2::scale_y_discrete(name = NULL) +
    ggplot2::scale_fill_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    ggplot2::guides(fill = "none") +
    ggplot2::theme_light() %+replace%
    ggplot2::theme(
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_text(family = "Arial", size = 7),
        axis.text.y = ggplot2::element_blank(),
        axis.ticks.y = ggplot2::element_blank(),
        panel.background = ggplot2::element_blank(),
        strip.text = ggplot2::element_text(
            family = "Arial", size = 7,
            margin = ggplot2::margin(
                t = 4.4, r = 4.4, b = 4.4, l = 4.4, unit = "pt"
            )
        )
    )

enriched_go |> knitr::kable()
category rank go_id term p_value p_value_log
fresh_healthy 1 GO:0009653 anatomical structure morphogenesis 0.0e+00 13.552842
fresh_healthy 2 GO:0048468 cell development 0.0e+00 12.721246
fresh_healthy 3 GO:0006928 movement of cell or subcellular component 0.0e+00 11.657577
fresh_healthy 4 GO:0023052 signaling 0.0e+00 10.886057
fresh_icm 1 GO:0030029 actin filament-based process 0.0e+00 22.136677
fresh_icm 2 GO:0003012 muscle system process 0.0e+00 21.585027
fresh_icm 3 GO:0006936 muscle contraction 0.0e+00 18.397940
fresh_icm 4 GO:0061061 muscle structure development 0.0e+00 18.251812
fresh_nicm 1 GO:0007155 cell adhesion 0.0e+00 10.657577
fresh_nicm 2 GO:0007275 multicellular organism development 0.0e+00 9.602060
fresh_nicm 3 GO:0016477 cell migration 0.0e+00 9.229148
fresh_nicm 4 GO:0120036 plasma membrane bounded cell projection organization 1.0e-07 6.853872
fresh_hcm 1 GO:0034332 adherens junction organization 5.3e-05 4.275724
fresh_hcm 2 GO:0072073 kidney epithelium development 2.4e-04 3.619789
fresh_hcm 3 GO:0034330 cell junction organization 3.0e-04 3.522879
fresh_hcm 4 GO:0032502 developmental process 3.4e-04 3.468521

Embedding

SEED <- 20220408

color_palette_group <- setNames(
    object = as.character(yarrr::piratepal(palette = "google")),
    nm = c("Healthy", "ICM", "NICM", "HCM")
)

matrix_cpm_use <- t(
    t(matrix_readcount_use) / colSums(matrix_readcount_use)
) * 1e+06
embedding <- purrr::map(
    list(rownames(matrix_cpm_use), features_selected), \(x) {
        matrix_umap <- matrix_cpm_use[x, unlist(samples)]
        matrix_umap <- matrix_umap[rowSums(matrix_umap) >= 30, ]
        matrix_umap <- log1p(matrix_umap)
        matrix_umap <- t(
            scale(t(matrix_umap), center = TRUE, scale = TRUE)
        )

        set.seed(seed = SEED)
        embedding_umap <- uwot::umap(
            X = t(matrix_umap),
            n_neighbors = 10,
            n_components = 2,
            metric = "euclidean",
            spread = 1,
            min_dist = 0.01,
            n_threads = 1,
            verbose = TRUE
        )
    }
) |>
    purrr::reduce(cbind) |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::rename(
        "x_umap1" = "V1",
        "y_umap1" = "V2",
        "x_umap2" = "V3",
        "y_umap2" = "V4"
    ) |>
    dplyr::mutate(
        category = case_when(
            sample %in% samples$fresh_healthy ~ "Healthy",
            sample %in% samples$fresh_icm ~ "ICM",
            sample %in% samples$fresh_nicm ~ "NICM",
            sample %in% samples$fresh_hcm ~ "HCM"
        ),
        category = factor(
            category,
            levels = c("Healthy", "ICM", "NICM", "HCM")
        )
    )
23:59:08 UMAP embedding parameters a = 1.896 b = 0.8006
23:59:08 Read 42 rows and found 205154 numeric columns
23:59:08 Using FNN for neighbor search, n_neighbors = 10
23:59:09 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 10
23:59:10 Initializing from normalized Laplacian + noise (using irlba)
23:59:10 Commencing optimization for 500 epochs, with 552 positive edges
23:59:10 Optimization finished
23:59:10 UMAP embedding parameters a = 1.896 b = 0.8006
23:59:10 Read 42 rows and found 5751 numeric columns
23:59:10 Using FNN for neighbor search, n_neighbors = 10
23:59:10 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 10
23:59:11 Initializing from normalized Laplacian + noise (using irlba)
23:59:11 Commencing optimization for 500 epochs, with 416 positive edges
23:59:11 Optimization finished
GEOM_POINT_SIZE <- 2
EMBEDDING_TITLE_PREFIX <- "UMAP"

purrr::map2(
    list("1", "2"),
    list(nrow(matrix_cpm_use), length(features_selected)), \(x, y) {
        plot_embedding(
            data = embedding[, paste0(c("x_umap", "y_umap"), x)],
            color = embedding$category |> as.factor(),
            label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {y}"),
            label_position = NULL,
            show_color_value_labels = FALSE,
            show_color_legend = FALSE,
            geom_point_size = GEOM_POINT_SIZE,
            sort_values = FALSE,
            rasterise = FALSE
        ) +
            scale_color_manual(
                values = color_palette_group
            ) +
            theme_customized_embedding()
    }
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Prediction

N_REPLICATES <- 100
N_SAMPLES_TEST <- 2

AUC; multiple

76 M

matrix_rf_core_set <- matrix_cpm_use[
    rownames(matrix_cpm_use) %in% features_selected,
    unlist(samples)
] |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::mutate(
        group = case_when(
            sample %in% samples_fresh_healthy ~ "healthy",
            sample %in% samples_fresh_icm ~ "icm",
            sample %in% samples_fresh_nicm ~ "nicm",
            sample %in% samples_fresh_hcm ~ "hcm"
        ),
        group = factor(
            group,
            levels = c("healthy", "icm", "nicm", "hcm")
        )
    )

rownames(matrix_rf_core_set) <- matrix_rf_core_set$sample
matrix_rf_core_set$sample <- NULL
colnames(matrix_rf_core_set) <- make.names(
    colnames(matrix_rf_core_set)
)

# sanity check
stopifnot(
    class(matrix_rf_core_set) == "data.frame"
)
roc_data <- purrr::map(seq_len(N_REPLICATES), \(x) {
    # roc_data <- furrr::future_map(seq_len(N_REPLICATES), \(x) {
    samples_test <- purrr::map(samples, \(x) {
        sample(
            stringr::str_remove(
                string = x,
                pattern = "_[1|2]"
            ) |> unique(),
            N_SAMPLES_TEST
        )
    })

    samples_test <- unlist(samples)[unlist(samples) |>
        stringr::str_remove(
            pattern = "_[1|2]"
        ) %in% unlist(samples_test)]

    matrix_rf_core_set_test <- matrix_rf_core_set[samples_test, ]
    matrix_rf_core_set_train <- matrix_rf_core_set[
        !rownames(matrix_rf_core_set) %in% samples_test,
    ]

    rf_model <- randomForest::randomForest(
        group ~ .,
        data = matrix_rf_core_set_train[
            , !colnames(matrix_rf_core_set_train) %in% c("sample"),
        ],
        ntree = 1000
    )
    rf_prediction <- predict(
        rf_model,
        matrix_rf_core_set_test,
        type = "prob"
    )

    roc_rf <- pROC::multiclass.roc(
        matrix_rf_core_set_test$group,
        rf_prediction[, 2]
    )

    cat(
        nrow(matrix_rf_core_set_test),
        nrow(matrix_rf_core_set_train),
        pROC::auc(roc_rf),
        "\n"
    )

    return(roc_rf)
})
names(roc_data) <- seq_len(N_REPLICATES)
roc_data <- format_roc(roc_data = roc_data)
roc_averaged <- average_roc(roc_data = roc_data)

20 M

library_size <- tibble::tribble(
    ~sample, ~num_reads,
    "F1_1", 60964390,
    "F1_2", 57968864,
    "F2_1", 54158970,
    "F2_2", 143929746,
    "F5_1", 55528072,
    "F5_2", 64044782,
    "P3_1", 78506662,
    "P3_2", 80161416,
    "P5_1", 100592008,
    "P5_2", 99926658,
    "P6_1", 74427936,
    "P6_2", 74223738,
    "P104a_1", 61766652,
    "P104a_2", 71350414,
    "P117b_1", 66442460,
    "P117b_2", 76837402,
    "P123b_1", 80766906,
    "P123b_2", 82552378,
    "P131a_1", 88475442,
    "P131a_2", 68512592,
    "P92a_1", 99683410,
    "P92a_2", 74318972,
    "P114b_1", 69490016,
    "P114b_2", 74175524,
    "P59a_1", 72517888,
    "P59a_2", 75524788,
    "P60a_1", 75844218,
    "P60a_2", 77979036,
    "P73a_1", 109707158,
    "P73a_2", 81454894,
    "P87a_1", 68502630,
    "P87a_2", 52512570,
    "HOCM4_1", 63494724,
    "HOCM4_2", 64409436,
    "HOCM6_1", 64873814,
    "HOCM6_2", 80750200,
    "HOCM9_1", 81728322,
    "HOCM9_2", 74958418,
    "HOCM11_1", 65222662,
    "HOCM11_2", 77667354,
    "HOCM7_1", 69229542,
    "HOCM7_2", 80768964
)

library_size <- setNames(
    object = library_size$num_reads,
    nm = library_size$sample
)
TARGET_NUM_READS <- 20000000
roc_data <- purrr::map(seq_len(N_REPLICATES), \(x) {
    # roc_data <- furrr::future_map(seq_len(N_REPLICATES), \(x) {
    samples_test <- purrr::map(samples, \(x) {
        sample(
            stringr::str_remove(
                string = x,
                pattern = "_[1|2]"
            ) |> unique(),
            N_SAMPLES_TEST
        )
    })

    samples_test <- unlist(samples)[unlist(samples) |>
        stringr::str_remove(
            pattern = "_[1|2]"
        ) %in% unlist(samples_test)]

    matrix_rf_core_set_train <- matrix_rf_core_set[
        !rownames(matrix_rf_core_set) %in% samples_test,
    ]

    matrix_cpm_test <- downsample_matrix(
        matrix = matrix_readcount_use[, samples_test],
        proportion = TARGET_NUM_READS / library_size[samples_test],
        seed = SEED
    ) |>
        as.matrix() |>
        calc_cpm()

    matrix_rf_core_set_test <- matrix_cpm_test[
        rownames(matrix_cpm_test) %in% features_selected,
        unlist(samples_test)
    ] |>
        t() |>
        as.matrix() |>
        as.data.frame() |>
        tibble::rownames_to_column(var = "sample") |>
        dplyr::mutate(
            group = case_when(
                sample %in% samples_fresh_healthy ~ "healthy",
                sample %in% samples_fresh_icm ~ "icm",
                sample %in% samples_fresh_nicm ~ "nicm",
                sample %in% samples_fresh_hcm ~ "hcm"
            ),
            group = factor(
                group,
                levels = c("healthy", "icm", "nicm", "hcm")
            )
        )

    rownames(matrix_rf_core_set_test) <- matrix_rf_core_set_test$sample
    matrix_rf_core_set_test$sample <- NULL
    colnames(matrix_rf_core_set_test) <- make.names(
        colnames(matrix_rf_core_set_test)
    )

    class(matrix_rf_core_set_test)
    matrix_rf_core_set_test[1:5, 1:5]

    rf_model <- randomForest::randomForest(
        group ~ .,
        data = matrix_rf_core_set_train[
            ,
            !colnames(matrix_rf_core_set_train) %in% c("sample"),
        ],
        ntree = 1000
    )
    rf_prediction <- predict(
        rf_model,
        matrix_rf_core_set_test,
        type = "prob"
    )

    roc_rf <- pROC::multiclass.roc(
        matrix_rf_core_set_test$group, rf_prediction[, 2]
    )

    cat(
        nrow(matrix_rf_core_set_test),
        nrow(matrix_rf_core_set_train),
        pROC::auc(roc_rf),
        "\n"
    )

    return(roc_rf)
})
names(roc_data) <- seq_len(N_REPLICATES)
roc_data <- format_roc(roc_data = roc_data)
roc_averaged_downsampled <- average_roc(roc_data = roc_data)
plot_roc_curve_two(
    data = list(roc_averaged, roc_averaged_downsampled),
    label = c("76 M", "20 M")
)

Construct model

Build

matrix_rf_train <- matrix_cpm_use[
    rownames(matrix_cpm_use) %in% features_selected,
    c(
        samples_fresh_healthy,
        samples_fresh_icm,
        samples_fresh_nicm,
        samples_fresh_hcm
    )
]

matrix_rf_train <- matrix_rf_train |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::mutate(
        group = case_when(
            sample %in% samples_fresh_healthy ~ "healthy",
            sample %in% samples_fresh_icm ~ "icm",
            sample %in% samples_fresh_nicm ~ "nicm",
            sample %in% samples_fresh_hcm ~ "hcm"
        ),
        group = factor(
            group,
            levels = c("healthy", "icm", "nicm", "hcm")
        )
    )

rownames(matrix_rf_train) <- matrix_rf_train$sample
matrix_rf_train$sample <- NULL
colnames(matrix_rf_train) <- make.names(colnames(matrix_rf_train))

# sanity check
stopifnot(
    class(matrix_rf_train) == "data.frame"
)
set.seed(seed = SEED)
rf_model <- randomForest::randomForest(
    group ~ .,
    data = matrix_rf_train[
        ,
        !colnames(matrix_rf_train) %in% c("sample"),
    ],
    ntree = 1000
)

Predict

samples_selected <- tibble::tribble(
    ~sample, ~ratio,
    "P97a_1", 0.3677451197,
    "P97a_2", 0.396041047,
    "P141a_1", 0.208162124,
    "P141a_2", 0.312910588,
    "MYEC4_2", 0.320481892,
    "P108b_1", 0.239929499,
    "P108b_2", 0.321824834
)

matrix_readcount_p1 <- matrix_readcount_use[
    ,
    c("F1_1", "P92a_1", "P59a_1", "HOCM4_1")
]

matrix_readcount_p2 <- purrr::map(
    seq_len(nrow(samples_selected)), \(x) {
        downsample_matrix(
            matrix_readcount_use[
                , samples_selected[x, "sample", drop = TRUE],
                drop = FALSE
            ],
            proportion = c(samples_selected[x, "ratio", drop = TRUE])
        )
    }
) |>
    purrr::reduce(cbind)

matrix_rf_test <- calc_cpm(
    cbind(
        matrix_readcount_p1,
        matrix_readcount_p2
    )
)[
    rownames(matrix_readcount_use) %in% features_selected,
]

matrix_rf_test <- matrix_rf_test |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample")

rownames(matrix_rf_test) <- matrix_rf_test$sample
matrix_rf_test$sample <- NULL
colnames(matrix_rf_test) <- make.names(colnames(matrix_rf_test))

rf_prediction <- predict(rf_model, matrix_rf_test, type = "prob")

Visualize

Heatmap
# horizontal
ht <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap |> as.matrix() |> t(),
    col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = TRUE,
    row_labels = colnames(matrix_heatmap),
    row_names_side = c("left"),
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    show_column_names = TRUE,
    column_names_side = c("bottom"),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_rot = 90,
    #
    show_heatmap_legend = TRUE,
    heatmap_legend_param = list(
        title = "Score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = grid::unit(12.5, "mm"),
        legend_width = grid::unit(1, "mm")
    ),
)
# draw
ComplexHeatmap::draw(ht)

Radar plot
purrr::map(rownames(matrix_heatmap), \(x) {
    data <- matrix_heatmap[
        x, c("HCM     ", "Healthy", "     ICM", "NICM"),
        drop = FALSE
    ]
    data <- data * 100

    plot_radar(data)
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3)

R session info

devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.1 (2022-06-23)
 os       macOS Monterey 12.6
 system   aarch64, darwin21.6.0
 ui       unknown
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Chicago
 date     2022-09-22
 pandoc   2.19.2 @ /opt/homebrew/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package              * version       date (UTC) lib source
 annotate               1.74.0        2022-04-26 [1] Bioconductor
 AnnotationDbi          1.58.0        2022-04-26 [1] Bioconductor
 BayesFactor            0.9.12-4.4    2022-07-05 [1] CRAN (R 4.2.1)
 beeswarm               0.4.0         2021-06-01 [1] CRAN (R 4.2.0)
 Biobase              * 2.56.0        2022-04-26 [1] Bioconductor
 BiocGenerics         * 0.42.0        2022-04-26 [1] Bioconductor
 BiocParallel           1.30.3        2022-06-05 [1] Bioconductor
 Biostrings             2.64.1        2022-08-18 [1] Bioconductor
 bit                    4.0.4         2020-08-04 [1] CRAN (R 4.2.0)
 bit64                  4.0.5         2020-08-30 [1] CRAN (R 4.2.0)
 bitops                 1.0-7         2021-04-24 [1] CRAN (R 4.2.0)
 blob                   1.2.3         2022-04-10 [1] CRAN (R 4.2.0)
 cachem                 1.0.6         2021-08-19 [1] CRAN (R 4.2.0)
 Cairo                  1.6-0         2022-07-05 [1] CRAN (R 4.2.1)
 callr                  3.7.2         2022-08-22 [1] CRAN (R 4.2.1)
 circlize               0.4.15        2022-05-10 [1] CRAN (R 4.2.0)
 cli                    3.4.0         2022-09-08 [1] CRAN (R 4.2.1)
 clue                   0.3-61        2022-05-30 [1] CRAN (R 4.2.0)
 cluster                2.1.4         2022-08-22 [2] CRAN (R 4.2.1)
 coda                   0.19-4        2020-09-30 [1] CRAN (R 4.2.0)
 codetools              0.2-18        2020-11-04 [2] CRAN (R 4.2.1)
 colorspace             2.0-3         2022-02-21 [1] CRAN (R 4.2.0)
 ComplexHeatmap         2.12.1        2022-08-09 [1] Bioconductor
 crayon                 1.5.1         2022-03-26 [1] CRAN (R 4.2.0)
 DBI                    1.1.3         2022-06-18 [1] CRAN (R 4.2.0)
 DelayedArray           0.22.0        2022-04-26 [1] Bioconductor
 DESeq2               * 1.36.0        2022-04-26 [1] Bioconductor
 devtools               2.4.4.9000    2022-08-11 [1] Github (r-lib/devtools@c9696a6)
 digest                 0.6.29        2021-12-01 [1] CRAN (R 4.2.0)
 doParallel             1.0.17        2022-02-07 [1] CRAN (R 4.2.0)
 dplyr                * 1.0.99.9000   2022-09-21 [1] Github (tidyverse/dplyr@60f7a2d)
 ellipsis               0.3.2         2021-04-29 [1] CRAN (R 4.2.0)
 evaluate               0.16          2022-08-09 [1] CRAN (R 4.2.1)
 extrafont            * 0.18          2022-04-12 [1] CRAN (R 4.2.0)
 extrafontdb            1.0           2012-06-11 [1] CRAN (R 4.2.0)
 fansi                  1.0.3         2022-03-24 [1] CRAN (R 4.2.0)
 farver                 2.1.1         2022-07-06 [1] CRAN (R 4.2.1)
 fastmap                1.1.0         2021-01-25 [1] CRAN (R 4.2.0)
 FNN                    1.1.3.1       2022-05-23 [1] CRAN (R 4.2.0)
 forcats              * 0.5.2.9000    2022-08-20 [1] Github (tidyverse/forcats@bd319e0)
 foreach                1.5.2         2022-02-02 [1] CRAN (R 4.2.0)
 fs                     1.5.2.9000    2022-08-24 [1] Github (r-lib/fs@238032f)
 genefilter             1.78.0        2022-04-26 [1] Bioconductor
 geneplotter            1.74.0        2022-04-26 [1] Bioconductor
 generics               0.1.3         2022-07-05 [1] CRAN (R 4.2.1)
 GenomeInfoDb         * 1.32.4        2022-09-06 [1] Bioconductor
 GenomeInfoDbData       1.2.8         2022-04-22 [1] Bioconductor
 GenomicRanges        * 1.48.0        2022-04-26 [1] Bioconductor
 GetoptLong             1.0.5         2020-12-15 [1] CRAN (R 4.2.0)
 ggbeeswarm             0.6.0         2017-08-07 [1] CRAN (R 4.2.0)
 ggplot2              * 3.3.6.9000    2022-09-12 [1] Github (tidyverse/ggplot2@a58b48c)
 ggrastr                1.0.1         2022-09-12 [1] Github (VPetukhov/ggrastr@7aed9af)
 GlobalOptions          0.1.2         2020-06-10 [1] CRAN (R 4.2.0)
 glue                   1.6.2.9000    2022-04-22 [1] Github (tidyverse/glue@d47d6c7)
 gtable                 0.3.1.9000    2022-09-01 [1] Github (r-lib/gtable@c1a7a81)
 highr                  0.9           2021-04-16 [1] CRAN (R 4.2.0)
 hms                    1.1.2         2022-08-19 [1] CRAN (R 4.2.1)
 htmltools              0.5.3         2022-07-18 [1] CRAN (R 4.2.1)
 htmlwidgets            1.5.4         2022-08-23 [1] Github (ramnathv/htmlwidgets@400cf1a)
 httpuv                 1.6.6         2022-09-08 [1] CRAN (R 4.2.1)
 httr                   1.4.4         2022-08-17 [1] CRAN (R 4.2.1)
 IRanges              * 2.30.1        2022-08-18 [1] Bioconductor
 irlba                  2.3.5         2021-12-06 [1] CRAN (R 4.2.1)
 isoband                0.2.5         2021-07-13 [1] CRAN (R 4.2.0)
 iterators              1.0.14        2022-02-05 [1] CRAN (R 4.2.0)
 jpeg                   0.1-9         2021-07-24 [1] CRAN (R 4.2.0)
 jsonlite               1.8.0         2022-02-22 [1] CRAN (R 4.2.0)
 KEGGREST               1.36.3        2022-07-12 [1] Bioconductor
 knitr                  1.40          2022-08-24 [1] CRAN (R 4.2.1)
 labeling               0.4.2         2020-10-20 [1] CRAN (R 4.2.0)
 later                  1.3.0         2021-08-18 [1] CRAN (R 4.2.0)
 lattice                0.20-45       2021-09-22 [2] CRAN (R 4.2.1)
 lifecycle              1.0.2.9000    2022-09-09 [1] Github (r-lib/lifecycle@a2666fc)
 locfit                 1.5-9.6       2022-07-11 [1] CRAN (R 4.2.1)
 lubridate            * 1.8.0.9000    2022-05-24 [1] Github (tidyverse/lubridate@0bb49b2)
 magick                 2.7.3         2021-08-18 [1] CRAN (R 4.2.0)
 magrittr               2.0.3         2022-03-30 [1] CRAN (R 4.2.0)
 MASS                   7.3-58.1      2022-08-03 [2] CRAN (R 4.2.1)
 Matrix               * 1.5-1         2022-09-13 [1] CRAN (R 4.2.1)
 MatrixGenerics       * 1.8.1         2022-06-26 [1] Bioconductor
 MatrixModels           0.5-1         2022-09-11 [1] CRAN (R 4.2.1)
 matrixStats          * 0.62.0        2022-04-19 [1] CRAN (R 4.2.0)
 memoise                2.0.1         2021-11-26 [1] CRAN (R 4.2.0)
 mime                   0.12          2021-09-28 [1] CRAN (R 4.2.0)
 miniUI                 0.1.1.1       2018-05-18 [1] CRAN (R 4.2.0)
 munsell                0.5.0         2018-06-12 [1] CRAN (R 4.2.0)
 mvtnorm                1.1-3         2021-10-08 [1] CRAN (R 4.2.1)
 patchwork            * 1.1.2.9000    2022-08-20 [1] Github (thomasp85/patchwork@c14c960)
 pbapply                1.5-0         2021-09-16 [1] CRAN (R 4.2.0)
 pillar                 1.8.1         2022-08-19 [1] CRAN (R 4.2.1)
 pkgbuild               1.3.1         2021-12-20 [1] CRAN (R 4.2.0)
 pkgconfig              2.0.3         2019-09-22 [1] CRAN (R 4.2.0)
 pkgload                1.3.0         2022-06-27 [1] CRAN (R 4.2.1)
 plyr                   1.8.7         2022-03-24 [1] CRAN (R 4.2.0)
 png                    0.1-7         2013-12-03 [1] CRAN (R 4.2.0)
 prettyunits            1.1.1.9000    2022-04-22 [1] Github (r-lib/prettyunits@8706d89)
 pROC                   1.18.0        2021-09-03 [1] CRAN (R 4.2.0)
 processx               3.7.0         2022-07-07 [1] CRAN (R 4.2.1)
 profvis                0.3.7         2020-11-02 [1] CRAN (R 4.2.0)
 promises               1.2.0.1       2021-02-11 [1] CRAN (R 4.2.0)
 ps                     1.7.1         2022-06-18 [1] CRAN (R 4.2.0)
 purrr                * 0.9000.0.9000 2022-09-21 [1] Github (tidyverse/purrr@194b8ef)
 R.cache                0.16.0        2022-07-21 [1] CRAN (R 4.2.1)
 R.methodsS3            1.8.2         2022-06-13 [1] CRAN (R 4.2.0)
 R.oo                   1.25.0        2022-06-12 [1] CRAN (R 4.2.0)
 R.utils                2.12.0        2022-06-28 [1] CRAN (R 4.2.1)
 R6                     2.5.1.9000    2022-08-04 [1] Github (r-lib/R6@87d5e45)
 ragg                   1.2.2.9000    2022-09-12 [1] Github (r-lib/ragg@904e145)
 randomForest           4.7-1.1       2022-05-23 [1] CRAN (R 4.2.1)
 RColorBrewer           1.1-3         2022-04-03 [1] CRAN (R 4.2.0)
 Rcpp                   1.0.9         2022-07-08 [1] CRAN (R 4.2.1)
 RCurl                  1.98-1.8      2022-07-30 [1] CRAN (R 4.2.1)
 readr                * 2.1.2.9000    2022-09-20 [1] Github (tidyverse/readr@5cac6ed)
 remotes                2.4.2         2022-09-12 [1] Github (r-lib/remotes@bc0949d)
 reticulate             1.26          2022-08-31 [1] CRAN (R 4.2.1)
 rjson                  0.2.21        2022-01-09 [1] CRAN (R 4.2.0)
 rlang                  1.0.5.9000    2022-09-16 [1] Github (r-lib/rlang@5a25ff4)
 rmarkdown              2.16.1        2022-08-25 [1] Github (rstudio/rmarkdown@b8a9879)
 RSQLite                2.2.17        2022-09-10 [1] CRAN (R 4.2.1)
 Rttf2pt1               1.3.10        2022-02-07 [1] CRAN (R 4.2.0)
 S4Vectors            * 0.34.0        2022-04-26 [1] Bioconductor
 scales                 1.2.1.9000    2022-08-20 [1] Github (r-lib/scales@b3df2fb)
 sessioninfo            1.2.2         2021-12-06 [1] CRAN (R 4.2.0)
 shape                  1.4.6         2021-05-19 [1] CRAN (R 4.2.0)
 shiny                  1.7.2         2022-07-19 [1] CRAN (R 4.2.1)
 stringi                1.7.8         2022-07-11 [1] CRAN (R 4.2.1)
 stringr              * 1.4.1.9000    2022-08-21 [1] Github (tidyverse/stringr@792bc92)
 styler               * 1.7.0.9002    2022-09-21 [1] Github (r-lib/styler@1f4437b)
 SummarizedExperiment * 1.26.1        2022-04-29 [1] Bioconductor
 survival               3.4-0         2022-08-09 [2] CRAN (R 4.2.1)
 systemfonts            1.0.4         2022-02-11 [1] CRAN (R 4.2.0)
 textshaping            0.3.6         2021-10-13 [1] CRAN (R 4.2.0)
 tibble               * 3.1.8.9001    2022-09-20 [1] Github (tidyverse/tibble@b740af1)
 tidyr                * 1.2.1.9000    2022-09-09 [1] Github (tidyverse/tidyr@653def2)
 tidyselect             1.1.2.9000    2022-09-21 [1] Github (r-lib/tidyselect@edd0a3b)
 tidyverse            * 1.3.2.9000    2022-09-12 [1] Github (tidyverse/tidyverse@3be8283)
 tzdb                   0.3.0         2022-03-28 [1] CRAN (R 4.2.0)
 urlchecker             1.0.1         2021-11-30 [1] CRAN (R 4.2.0)
 usethis                2.1.6.9000    2022-09-15 [1] Github (r-lib/usethis@7c34252)
 utf8                   1.2.2         2021-07-24 [1] CRAN (R 4.2.0)
 uwot                   0.1.14        2022-09-12 [1] Github (jlmelville/uwot@e739069)
 vctrs                  0.4.1.9000    2022-09-19 [1] Github (r-lib/vctrs@0a219ba)
 vipor                  0.4.5         2017-03-22 [1] CRAN (R 4.2.0)
 wesanderson            0.3.6.9000    2022-04-22 [1] Github (karthik/wesanderson@2796b59)
 withr                  2.5.0         2022-03-03 [1] CRAN (R 4.2.0)
 xfun                   0.33          2022-09-12 [1] CRAN (R 4.2.1)
 XML                    3.99-0.10     2022-06-09 [1] CRAN (R 4.2.0)
 xtable                 1.8-4         2019-04-21 [1] CRAN (R 4.2.0)
 XVector                0.36.0        2022-04-26 [1] Bioconductor
 yaml                   2.3.5         2022-02-21 [1] CRAN (R 4.2.0)
 yarrr                  0.1.6         2022-04-22 [1] Github (ndphillips/yarrr@e2e4488)
 zlibbioc               1.42.0        2022-04-26 [1] Bioconductor

 [1] /opt/homebrew/lib/R/4.2/site-library
 [2] /opt/homebrew/Cellar/r/4.2.1_4/lib/R/library

─ Python configuration ───────────────────────────────────────────────────────
 python:         /Users/jialei/.pyenv/shims/python
 libpython:      /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/libpython3.9.dylib
 pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10:/Users/jialei/.pyenv/versions/mambaforge-4.10.3-10
 version:        3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:00:33)  [Clang 13.0.1 ]
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
 numpy_version:  1.22.4
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
 
 NOTE: Python version was forced by RETICULATE_PYTHON

──────────────────────────────────────────────────────────────────────────────